{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.5. Understanding the internals of NumPy to avoid unnecessary array copying"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def aid(x):\n",
    "    # This function returns the memory\n",
    "    # block address of an array.\n",
    "    return x.__array_interface__['data'][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(21535472, 21535480)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = np.zeros(3)\n",
    "aid(a), aid(a[1:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data_base(arr):\n",
    "    \"\"\"For a given NumPy array, find the base array\n",
    "    that owns the actual data.\"\"\"\n",
    "    base = arr\n",
    "    while isinstance(base.base, np.ndarray):\n",
    "        base = base.base\n",
    "    return base\n",
    "\n",
    "\n",
    "def arrays_share_data(x, y):\n",
    "    return get_data_base(x) is get_data_base(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n"
     ]
    }
   ],
   "source": [
    "print(arrays_share_data(a, a.copy()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "print(arrays_share_data(a, a[:1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "32250112"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "a = np.zeros(10)\n",
    "ax = aid(a)\n",
    "ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = a.copy()\n",
    "aid(b) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a *= 2\n",
    "aid(a) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = a * 2\n",
    "aid(c) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4.85 ms ± 24 µs per loop (mean ± std. dev. of 7 runs,\n",
      "100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit a = np.zeros(10000000)\n",
    "a *= 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7.7 ms ± 105 µs per loop (mean ± std. dev. of 7 runs,\n",
      "100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit a = np.zeros(10000000)\n",
    "b = a * 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.zeros((100, 100))\n",
    "ax = aid(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "b = a.reshape((1, -1))\n",
    "aid(b) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c = a.T.reshape((1, -1))\n",
    "aid(c) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "330 ns ± 0.517 ns per loop (mean ± std. dev. of 7 runs\n",
      "    1000000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit b = a.reshape((1, -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5 µs ± 5.68 ns per loop (mean ± std. dev. of 7 runs,\n",
      "    100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit a.T.reshape((1, -1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d = a.flatten()\n",
    "aid(d) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e = a.ravel()\n",
    "aid(e) == ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.3 µs ± 18.1 ns per loop (mean ± std. dev. of 7 runs,\n",
      "100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit a.flatten()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "199 ns ± 5.02 ns per loop (mean ± std. dev. of 7 runs,\n",
      "10000000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit a.ravel()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.arange(n)\n",
    "ac = a[:, np.newaxis]  # column vector\n",
    "ar = a[np.newaxis, :]  # row vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.7 ms ± 42.6 µs per loop (mean ± std. dev. of 7 runs,\n",
      "100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit np.tile(ac, (1, n)) * np.tile(ar, (n, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "784 µs ± 2.39 µs per loop (mean ± std. dev. of 7 runs,\n",
      "1000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit ar * ac"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```\n",
    "offset = array.strides[0] * i1 + array.strides[1] * i2\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.random.rand(5000, 5000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.91 µs ± 20 ns per loop (mean ± std. dev. of 7 runs,\n",
      "    100000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit a[0, :].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "33.7 µs ± 22.7 ns per loop (mean ± std. dev. of 7 runs\n",
      "    10000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%timeit a[:, 0].sum()"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 2
}
